library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.1.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
library(knitr)
if (!require("gganimate")) install.packages("gganimate")
## Loading required package: gganimate
library(gganimate)
if (!require("transformr")) install.packages("transformr")
## Loading required package: transformr
## Warning: 'units::install_conversion_constant' is deprecated.
## Use 'install_unit' instead.
## See help("Deprecated")
## Warning: 'units::install_conversion_constant' is deprecated.
## Use 'install_unit' instead.
## See help("Deprecated")
## Warning: 'units::install_conversion_constant' is deprecated.
## Use 'install_unit' instead.
## See help("Deprecated")
## Warning: 'units::install_conversion_constant' is deprecated.
## Use 'install_unit' instead.
## See help("Deprecated")
## Warning: 'units::install_conversion_constant' is deprecated.
## Use 'install_unit' instead.
## See help("Deprecated")
library(transformr)
if (!require("plotly")) install.packages("plotly")
## Loading required package: plotly
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(plotly)
if (!require("kableExtra")) install.packages("kableExtra")
## Loading required package: kableExtra
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(kableExtra)
bills <- read_csv("../dataraw/billionaires_2021_10_31.csv")
## Rows: 500 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Name, Total_Net_Worth, LastChange, YTDChange, Country, Industry
## dbl (4): Rank, Total_Net_Worth_Bil, LastChange_Bil, YTDChange_Bil
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(bills)
## Rows: 500
## Columns: 10
## $ Rank <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,…
## $ Name <chr> "Elon Musk", "Jeff Bezos", "Bernard Arnault", "Bil…
## $ Total_Net_Worth <chr> "$311B", "$195B", "$167B", "$136B", "$131B", "$126…
## $ Total_Net_Worth_Bil <dbl> 311.0, 195.0, 167.0, 136.0, 131.0, 126.0, 121.0, 1…
## $ LastChange <chr> "+$9.32B", "-$3.79B", "-$544M", "+$906M", "+$1.71B…
## $ LastChange_Bil <dbl> 9.3200, -3.7900, -0.5440, 0.9060, 1.7100, 1.6400, …
## $ YTDChange <chr> "+$141B", "+$5.06B", "+$52.7B", "+$4.40B", "+$48.7…
## $ YTDChange_Bil <dbl> 141.00, 5.06, 52.70, 4.40, 48.70, 46.70, 17.30, 37…
## $ Country <chr> "UnitedStates", "UnitedStates", "France", "UnitedS…
## $ Industry <chr> "Technology", "Technology", "Consumer", "Technolog…
is_tibble(bills)
## [1] TRUE
ipeds <- read_csv("https://assets.datacamp.com/production/repositories/1942/datasets/18a000cf70d2fe999c6a6f2b28a7dc9813730e74/ipeds.csv")
## Rows: 3097 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): name, sector_label
## dbl (2): lat, lng
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(ipeds)
## Rows: 3,097
## Columns: 4
## $ name <chr> "A T Still University of Health Sciences", "Abilene Chris…
## $ lat <dbl> 40.19365, 32.46915, 31.48189, 34.06099, 44.85572, 29.6500…
## $ lng <dbl> -92.58918, -99.70954, -83.52828, -118.30118, -93.29981, -…
## $ sector_label <chr> "Private", "Private", "Public", "For-Profit", "For-Profit…
teenNC <- readRDS("../dataprocessed/CDCteenbirthrateNC.rds") %>%
mutate(subregion = tolower(county), # to match county map data
year = as.integer(year)) # to make the animation labels whole numbers
glimpse(teenNC)
## Rows: 1,600
## Columns: 8
## $ year <int> 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,…
## $ county <chr> "Alamance", "Alamance", "Alamance", "Alamance", "Alamance",…
## $ countyFIPS <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3,…
## $ FIPS <dbl> 37001, 37001, 37001, 37001, 37001, 37001, 37001, 37001, 370…
## $ birth_rate <dbl> 46.2, 46.0, 45.4, 47.0, 47.1, 45.2, 41.7, 36.8, 33.2, 30.8,…
## $ LCL <dbl> 42.2, 43.0, 42.9, 44.5, 44.7, 42.8, 39.5, 34.8, 31.3, 28.9,…
## $ UCL <dbl> 50.5, 49.1, 48.0, 49.5, 49.7, 47.7, 44.1, 38.9, 35.2, 32.7,…
## $ subregion <chr> "alamance", "alamance", "alamance", "alamance", "alamance",…
### County Data from Maps Package
countymapNC <- map_data("county") %>%
filter(region == "north carolina") %>%
select(-region) # not needed since it will be all NC data now
glimpse(countymapNC)
## Rows: 3,669
## Columns: 5
## $ long <dbl> -79.53800, -79.54372, -79.53800, -79.52081, -79.26298, -79.2…
## $ lat <dbl> 35.84424, 35.89008, 35.98175, 36.23385, 36.23385, 35.90726, …
## $ group <dbl> 1857, 1857, 1857, 1857, 1857, 1857, 1857, 1857, 1857, 1857, …
## $ order <int> 54915, 54916, 54917, 54918, 54919, 54920, 54921, 54922, 5492…
## $ subregion <chr> "alamance", "alamance", "alamance", "alamance", "alamance", …
bills_ex <- bills %>%
group_by(Industry) %>%
summarise(Sum = sum(Total_Net_Worth_Bil))
bills_red <- bills %>%
mutate(
Industry = ifelse(
bills$Industry == "Technology" |
bills$Industry == "Industrial" |
bills$Industry == "Finance" |
bills$Industry == "Diversified"|
bills$Industry == "Consumer",
Industry,
"Other"),
LastChange_Prop = (LastChange_Bil + Total_Net_Worth_Bil) / Total_Net_Worth_Bil,
LastChange_Coef = (LastChange_Bil + mean(Total_Net_Worth_Bil)) / mean(Total_Net_Worth_Bil)
) %>%
rename(`Net Worth (Billions)` = Total_Net_Worth_Bil,
`Change YTD` = YTDChange_Bil)
bills_rex <- bills_red %>%
group_by(Industry) %>%
summarise(Sum = sum(`Net Worth (Billions)`))
jitter <- position_jitter(width = 0.2, height = 0, seed = 5)
p <- ggplot(bills_red,
aes(
x = 1,
y = `Change YTD`,
color = Industry,
size = `Net Worth (Billions)`,
text = Name
)) +
geom_point(position = jitter) +
geom_point(shape = 1, colour = "gray60", alpha = .2, position = jitter)+
geom_hline(yintercept=0) +
scale_color_manual(
breaks = c("Technology", "Industrial", "Finance", "Diversified", "Consumer", "Other"),
values = c("#AA4499", "#882255", "#117733", "#DDCC77", "#CC6677", "#88CCEE")) +
scale_y_continuous(n.breaks = 18, minor_breaks = seq(-20, 20, 2)) +
scale_x_continuous(breaks = NULL, minor_breaks = NULL) +
scale_size(range = c(0, 20)) +
labs(
title = "Year-to-Date Change in Net Worth of 500 Wealthiest Individuals",
subtitle = "as of October 31st, 2021",
x = NULL,
y = "Change in Fortune, Billions of Dollars"
) +
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank())
ggplotly(p, tooltip = c("text", "size", "y", "color"))
This plot shows which billionaires have been making (and losing) the most money so far this year, color-coded by sector of the economy. The first thing which jumps out is how not only does the tech sector make up an outsized proportion of the wealth of the world’s top billionaires, it is also includes nearly all of the top 10 wealthiest individuals. By contrast, the catch-all category “other” includes more total wealth, but is made up of numerous smaller fortunes. It’s also immediately apparent that the individuals making the most money this year are mostly the ones who already have the most money, and again, most of them are in the tech sector. Finally, by hovering over each balloon, it is possible to see the name of the person. By this, we can see just how rapidly Elon Musk’s fortune is ballooning this year, vastly outstripping all of his closest competitors.
mapdata <- countymapNC %>%
full_join(teenNC, by = "subregion")
schoolsNC <- c(
"Appalachian State University",
"East Carolina University",
"Elizabeth City State University",
"Fayetteville State University",
"North Carolina A & T State University",
"North Carolina Central University",
"North Carolina State University at Raleigh",
"University of North Carolina at Asheville",
"University of North Carolina at Chapel Hill",
"University of North Carolina at Charlotte",
"University of North Carolina at Greensboro",
"University of North Carolina at Pembroke",
"University of North Carolina Wilmington",
"University of North Carolina School of the Arts",
"Western Carolina University",
"Winston-Salem State University"
)
ipedsNC <- ipeds %>%
filter (name %in% schoolsNC)
print(ipedsNC)
## # A tibble: 16 × 4
## name lat lng sector_label
## <chr> <dbl> <dbl> <chr>
## 1 Appalachian State University 36.2 -81.7 Public
## 2 East Carolina University 35.6 -77.4 Public
## 3 Elizabeth City State University 36.3 -76.2 Public
## 4 Fayetteville State University 35.1 -78.9 Public
## 5 North Carolina A & T State University 36.1 -79.8 Public
## 6 North Carolina Central University 36.0 -78.9 Public
## 7 North Carolina State University at Raleigh 35.8 -78.7 Public
## 8 University of North Carolina at Asheville 35.6 -82.6 Public
## 9 University of North Carolina at Chapel Hill 35.9 -79.1 Public
## 10 University of North Carolina at Charlotte 35.3 -80.7 Public
## 11 University of North Carolina at Greensboro 36.1 -79.8 Public
## 12 University of North Carolina at Pembroke 34.7 -79.2 Public
## 13 University of North Carolina School of the Arts 36.1 -80.2 Public
## 14 University of North Carolina Wilmington 34.2 -77.9 Public
## 15 Western Carolina University 35.3 -83.2 Public
## 16 Winston-Salem State University 36.1 -80.2 Public
teen_anim <- mapdata %>%
ggplot() +
geom_polygon(aes(x = long,
y = lat,
group = group,
fill = birth_rate),
color = "black") +
coord_map() +
scale_fill_gradient2(low = "white", mid = "lavender", high = "red", midpoint = 30) +
transition_states(year, transition_length = 0, state_length = 2, wrap = TRUE) + #
geom_point(data = ipedsNC,
aes(lng, lat, color = "Location of UNC Campuses"),
size = 1.5) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)) +
scale_colour_manual(values = "black", na.value = "black") +
labs(title = "Change in Teen Birth Rates in North Carolina",
subtitle = "Year: {closest_state} , per 1,000 females in age group 15–19 years", #
fill = "Teen Birth Rate",
color = NULL,
caption = "Data Source: Centers for Disease Control and Prevention") +
no_axes_theme
animate(teen_anim, duration = 60, end_pause = 20, start_pause = 8)
I was curious if the location of universities was correlated with birth rates in people under 20 years old. Of course, the locations of universities are themselves correlated with urban centers and higher income areas, but there does appear to be a loose correlation. I also wanted to show the change in teen birth rates over time.
Similar to national and global trends, teen birth rates have been generally decreasing across the state over the past two decades. The map shows that areas with the highest rates in 2003 have the most dramatic reductions, but even areas with relatively low rates have seen decreases. Only one county in North Carolina has seen a net increase in the teen birth rate.
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] kableExtra_1.3.4 plotly_4.10.0 transformr_0.1.3 gganimate_1.0.7
## [5] knitr_1.33 maps_3.4.0 forcats_0.5.1 stringr_1.4.0
## [9] dplyr_1.0.7 purrr_0.3.4 readr_2.1.1 tidyr_1.1.3
## [13] tibble_3.1.6 ggplot2_3.3.5 tidyverse_1.3.1
##
## loaded via a namespace (and not attached):
## [1] fs_1.5.1 sf_0.8-0 lubridate_1.8.0 bit64_4.0.5
## [5] webshot_0.5.2 progress_1.2.2 httr_1.4.2 tools_3.6.0
## [9] backports_1.4.0 bslib_0.3.1 utf8_1.2.2 R6_2.5.1
## [13] KernSmooth_2.23-20 DBI_1.1.1 lazyeval_0.2.2 colorspace_2.0-2
## [17] withr_2.4.3 tidyselect_1.1.1 prettyunits_1.1.1 curl_4.3.2
## [21] bit_4.0.4 compiler_3.6.0 cli_3.1.0 rvest_1.0.2
## [25] xml2_1.3.3 labeling_0.4.2 sass_0.4.0 scales_1.1.1
## [29] classInt_0.4-3 proxy_0.4-26 systemfonts_1.0.3 digest_0.6.29
## [33] rmarkdown_2.11 svglite_2.0.0 pkgconfig_2.0.3 htmltools_0.5.2
## [37] highr_0.9 dbplyr_2.1.1 fastmap_1.1.0 htmlwidgets_1.5.4
## [41] rlang_0.4.12 readxl_1.3.1 rstudioapi_0.13 jquerylib_0.1.4
## [45] farver_2.1.0 generics_0.1.1 jsonlite_1.7.2 crosstalk_1.2.0
## [49] vroom_1.5.7 magrittr_2.0.1 Rcpp_1.0.7 munsell_0.5.0
## [53] fansi_0.5.0 lifecycle_1.0.1 stringi_1.7.6 yaml_2.2.1
## [57] plyr_1.8.6 grid_3.6.0 parallel_3.6.0 crayon_1.4.2
## [61] haven_2.4.3 mapproj_1.2.7 hms_1.1.1 magick_2.7.3
## [65] pillar_1.6.4 lpSolve_5.6.15 reprex_2.0.1 glue_1.5.1
## [69] evaluate_0.14 data.table_1.14.2 modelr_0.1.8 vctrs_0.3.8
## [73] tzdb_0.2.0 tweenr_1.0.2 cellranger_1.1.0 gtable_0.3.0
## [77] assertthat_0.2.1 xfun_0.28 broom_0.7.10 e1071_1.7-9
## [81] class_7.3-19 viridisLite_0.4.0 units_0.7-2 ellipsis_0.3.2